#install.packages("lubridate")
#install.packages("ggplot2")
#install.packages("forecast")
#install.packages("here")
#install.packages("knitr")
#install.packages("kableExtra")
#install.packages("dplyr")
#install.packages("openxlsx")
#install.packages("ggthemes")
#install.packages("tidyr")
#install.packages("GGally")
#install.packages("tseries")
#install.packages("blorr")
#install.packages("car")
#install.packages("corrplot")
#install.packages("dlm")
#install.packages("randomForest")
#install.packages("Kendall")
library(lubridate)
library(ggplot2)
library(forecast) 
library(here)
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.3.2
library(dplyr)
library(openxlsx)
library(ggthemes)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.2
library(Kendall)
library(GGally)
library(trend)
library(tseries)
library(blorr)   
library(lmtest) 
library(car)
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.2
library(corrplot)
library(dlm)
library(randomForest)
library(e1071) #for SVM
#date
df_load$date<-ymd(df_load$date)
df_temp$date<-ymd(df_temp$date)
df_humid$date<-ymd(df_humid$date)

#selected needed column
df_load_processed<-df_load%>%
  select(meter_id,date,daily_mean)
df_temp_processed<-df_temp[,2:28]
df_humid_processed<-df_humid[,2:28]

#as numaric
for (i in 2:27){
  df_temp_processed[,i]<-as.numeric(df_temp_processed[,i])
  df_humid_processed[,i]<-as.numeric(df_humid_processed[,i])
}
#load
ts_load<-ts(df_load_processed[,3],start=c(2005-01-01), frequency=365)
msts_load<-msts(df_load_processed[,3],seasonal.periods=c(7,365))
autoplot(msts_load)

#load
par(mfrow=c(1,2))
Acf(msts_load,lag.max=40,main=paste("ACF for load"),ylim=c(-0.5,1))
Pacf(msts_load,lag.max=40,main=paste("PACF for load"),ylim=c(-0.5,1))

plot_grid(
  autoplot(Acf(ts_load,plot=FALSE,lag.max=40)),
  autoplot(Pacf(ts_load,plot=FALSE,lag.max=40)),
  nrow=1
)

#decompose 
load_decompose<-decompose(msts_load)
plot(load_decompose)

#deseason
load_deseason<-seasadj(load_decompose)
plot_grid(
  autoplot(load_deseason),
  autoplot(Acf(load_deseason,plot=FALSE,lag.max=40)),
  autoplot(Pacf(load_deseason,plot=FALSE,lag.max=40)),
  nrow=1
)

#Mann-Kendall
summary(MannKendall(msts_load)) #trend in the series
## Score =  106325 , Var(Score) = 1428222848
## denominator =  2741282
## tau = 0.0388, 2-sided pvalue =0.0049019
#ADF test
print(adf.test(msts_load,alternative = "stationary")) #stationary trend
## Warning in adf.test(msts_load, alternative = "stationary"): p-value smaller
## than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  msts_load
## Dickey-Fuller = -5.3158, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
#find out how many times is needed for differencing 
ns_diff<-nsdiffs(ts_load)
cat("Number of seasonal differencing needed:", ns_diff) #no need to difference the series
## Number of seasonal differencing needed: 0

#Use the one-year testing dataset to find the best model for one-month forecast.

#training: from 2005-01-01 to 2010-05-30
#testing: from 2010-05-31 to 2011-05-31

#origin series
load_data <- df_load_processed[,3]


#splitting training and testing 
nobs <- nrow(load_data)
nfor <- 365
msts_training<-subset(msts_load, end=length(msts_load)-nfor)
msts_testing<-subset(msts_load, start=length(msts_load)-nfor+1)

msts_deseason_training<-subset(load_deseason, end=length(msts_load)-nfor)
msts_deseason_testing<-subset(load_deseason, start=length(msts_load)-nfor+1)

##ETS

ETS_fit<-stlf(msts_training,h=365)

##ARIMA

#K=c(2,4)
#fit ARIMA with original time series 
ARIMA_Four_fit_1<-auto.arima(msts_training,
                             seasonal=FALSE, 
                             xreg=fourier(msts_training,K=c(2,4)))
#forecast with ARIMA_fit
ARIMA_Four_forecast_1<-forecast(ARIMA_Four_fit_1,
                                xreg=fourier(msts_training,K=c(2,4),h=365),
                                h=365) 

#fit ARIMA with deseason time series 
#ARIMA_Four_fit_1<-auto.arima(msts_deseason_training,
                            # seasonal=FALSE, 
                            # xreg=fourier(msts_deseason_training,K=c(2,6)))
#forecast with ARIMA_fit
#ARIMA_Four_forecast_1<-forecast(ARIMA_Four_fit_1,
                              #  xreg=fourier(msts_deseason_training,K=c(2,6),h=31),
                             #   h=31) 

# Add seasonality back
#season_start <- length(load_decompose$seasonal)-364
#season_end <- length(load_decompose$seasonal)-364 + 30
#ARIMA_forecast <- ARIMA_Four_forecast_1$mean + load_decompose$seasonal[season_start:season_end]

#K=c(2,12)
#fit ARIMA with original time series 
ARIMA_Four_fit_2<-auto.arima(msts_training,
                             seasonal=FALSE, 
                             xreg=fourier(msts_training,K=c(2,12)))
##forecast with ARIMA_fit
ARIMA_Four_forecast_2<-forecast(ARIMA_Four_fit_2,
                                xreg=fourier(msts_training,K=c(2,12),h=365),
                                h=365) 
#temp
#create time series object 
msts_temp<-msts(df_temp_processed[,2], seasonal.periods=c(7,365),start=c(2005,01,01))
msts_temp_training<-subset(msts_temp, end=length(msts_temp)-nfor)
msts_temp_testing<-subset(msts_temp, start=length(msts_temp)-nfor+1)

#humid
#create time series object 
msts_humid<-msts(df_humid_processed[,2], seasonal.periods=c(7,365),start=c(2005,01,01))
msts_humid_training<-subset(msts_humid, end=length(msts_humid)-nfor)
msts_humid_testing<-subset(msts_humid, start=length(msts_humid)-nfor+1)

#
#K=c(2,4)
#fit ARIMA with original time series 
ARIMA_Four_fit_XREG_1<-auto.arima(msts_training,
                                  seasonal=FALSE,
                                  #combine fourier and exogenous variable time series 
                                  xreg=as.matrix(cbind(fourier(msts_training,K=c(2,4)),
                                                       msts_temp_training)))  

#generate fourier terms
fourier_terms_1<-fourier(msts_training, K=c(2,4), h = 365)
#generate forecast
temp_forecast_1<-forecast(msts_temp_training, h = 365)
#combine fourier terms and forecasts into a numeric matrix
xreg_matrix_1<-as.matrix(cbind(fourier_terms_1, temp_forecast_1$mean))

#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_1<-forecast(ARIMA_Four_fit_XREG_1,
                                   xreg = xreg_matrix_1,
                                   h = 365)
## Warning in forecast.forecast_ARIMA(ARIMA_Four_fit_XREG_1, xreg = xreg_matrix_1,
## : xreg contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.
#K=c(2,12)
#fit ARIMA with original time series 
ARIMA_Four_fit_XREG_2<-auto.arima(msts_training,
                                  seasonal=FALSE,
                                  #combine fourier and exogenous variable time series  
                                  xreg=as.matrix(cbind(fourier(msts_training,K=c(2,12)),
                                                       msts_temp_training)))  

#generate fourier terms
fourier_terms_2<-fourier(msts_training, K=c(2,12), h = 365)
#generate forecast
temp_forecast_2<-forecast(msts_temp_training, h = 365)
#combine fourier terms and forecasts into a numeric matrix
xreg_matrix_2<-as.matrix(cbind(fourier_terms_2, temp_forecast_2$mean))

#forecast with ARIMA_fit_XREG
ARIMA_Four_forecast_XREG_2<-forecast(ARIMA_Four_fit_XREG_2,
                                   xreg = xreg_matrix_2,
                                   h = 365)
## Warning in forecast.forecast_ARIMA(ARIMA_Four_fit_XREG_2, xreg = xreg_matrix_2,
## : xreg contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.

##NN

#p=1, P=0 because seasonal=FALSE

#K=c(2,4)
NN_fit_1<-nnetar(msts_training,
                 p=1,
                 P=0,
                 xreg=fourier(msts_training,K=c(2,4)))

NN_forecast_1<-forecast(NN_fit_1,
                        xreg=fourier(msts_training,K=c(2,4),h=365), 
                        h=365)
#Add seasonality back
#NN_forecast <- NN_forecast_1$mean + load_decompose$seasonal[season_start:season_end]


#K=c(2,6)
NN_fit_2<-nnetar(msts_training,
                 p=1,
                 P=0,
                 xreg=fourier(msts_training,K=c(2,6)))

NN_forecast_2<-forecast(NN_fit_2,
                        xreg=fourier(msts_training,K=c(2,6),h=365), 
                        h=365)


#K=c(2,12)
NN_fit_2<-nnetar(msts_training,
                 p=1,
                 P=0,
                 xreg=fourier(msts_training,K=c(2,12)))

NN_forecast_2<-forecast(NN_fit_2,
                        xreg=fourier(msts_training,K=c(2,12),h=365), 
                        h=365)

#One-month forecast horizon ## NN Optimization

# Training and test sets
n_train <- round(0.8*length(load_data))
n_test <- length(load_data)-n_train
train <- ts_load[1:n_train]
test <- ts_load[(n_train+1):length(load_data)]

nn_acc_min <- 40
p_opt <- 0
P_opt <-0

for (p in seq(0:7)){
  print(p)
  for (P in seq(0:3)){
    nn_train <- nnetar(train,p=p,P=1)
    nn_testfor <- forecast(nn_train,h=n_test)
    nn_acc <- accuracy(nn_testfor,test)
    if ((nn_acc[9] + nn_acc[10]) < nn_acc_min){
      nn_acc_min <- nn_acc[9] + nn_acc[10]
      p_opt <- p
      P_opt <- P
    }
  }
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
print(nn_acc_min, p_opt, P_opt)
## [1] 32

Optimized NN

#Ina: this chunk does not work for me
#forecast for one month
# Fit optimized model
#nn_train_opt <- nnetar(train,p=p_opt,P=P_opt)
#nn_testfor_opt <- forecast(nn_train_opt,h=n_test)
#nn_acc_opt <- accuracy(nn_testfor_opt,petrol_test)
#cat("Optimized Neural Network: p =",p_opt, ", P =",P_opt,", Training Accuracy (MAPE):", nn_acc_opt[9],", Test Accuracy (MAPE):", nn_acc_opt[10])

# Train on whole data set and forecast for 31 days
#nn_fit <- nnetar(msts_training,p=p_opt,P=P_opt)
#nn_for <- forecast(nn_fit, h=31)

TBATS (for one-month forecast horizon)

#forecast for one month
# Fit TBATS model
tbats_fit <- tbats(msts_training)
tbats_for <- forecast(tbats_fit, h=31)

#Plot results

library(ggplot2)
autoplot(ts(load_data), series="Time Series History") +
  #autolayer(nn_for$fitted, series = "NN Fit") +
  #autolayer(ts(tbats_for$fitted), series = "TBATS Fit") +
  #autolayer(nn_for$mean, series = "NN Forecast") +
  autolayer(ts(tbats_for$mean), series = "TBATS Forecast") +
  xlab("Year") + ylab("Load") +
  ggtitle("Neural Network Load Forecasting") 

#forecast period 
forecast_start<-start(ARIMA_Four_forecast_1$mean)
forecast_end<-end(ARIMA_Four_forecast_1$mean)
msts_forecast<-window(msts_load, start=forecast_start, end=forecast_end)

#plot forecasting result


#add the seasonality back when using deseason series
#ETS_fit$mean<-ETS_fit$mean+load_decompose$seasonal
#ARIMA_Four_forecast_1$mean<-ARIMA_Four_forecast_1$mean+load_decompose$seasonal
#NN_forecast_1$mean<-NN_forecast_1$mean+load_decompose$seasonal
#ARIMA_Four_forecast_XREG_1$mean<-ARIMA_Four_forecast_XREG_1$mean+load_decompose$seasonal

#with K=c(2,4)
autoplot(NN_forecast_1$mean)+
  autolayer(ETS_fit,series="STL+ETS",PI=FALSE)+
  autolayer(ARIMA_Four_forecast_1$mean, series="ARIMA Fourier",PI=FALSE)+
  autolayer(ARIMA_Four_forecast_XREG_1$mean, series="ARIMA Fourier XREG",PI=FALSE)+
  autolayer(NN_forecast_1$mean, series="Neural Network")
## Warning in ggplot2::geom_line(ggplot2::aes(x = .data[["timeVal"]], y = .data[["seriesVal"]], : Ignoring unknown parameters: `PI`
## Ignoring unknown parameters: `PI`

#with K=c(2,12) / K=c(2,6)
autoplot(msts_forecast)+
  autolayer(ETS_fit,series="STL+ETS",PI=FALSE)+
  autolayer(ARIMA_Four_forecast_2, series="ARIMA Fourier",PI=FALSE)+
  autolayer(ARIMA_Four_forecast_XREG_2, series="ARIMA Fourier XREG",PI=FALSE)+
  autolayer(NN_forecast_2, series="Neural Network") #K=c(2,6)

resi_ETS<-checkresiduals(ETS_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,N,N)
## Q* = 1375.7, df = 395, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 395
resi_ARIMA_Four_1<-checkresiduals(ARIMA_Four_forecast_1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,1) errors
## Q* = 636.85, df = 393, p-value = 7.871e-14
## 
## Model df: 2.   Total lags used: 395
resi_ARIMA_Four_2<-checkresiduals(ARIMA_Four_forecast_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(1,1,1) errors
## Q* = 640.54, df = 393, p-value = 3.819e-14
## 
## Model df: 2.   Total lags used: 395
resi_ARIMA_Four_XREG_1<-checkresiduals(ARIMA_Four_forecast_XREG_1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,1,1) errors
## Q* = 443.85, df = 392, p-value = 0.03597
## 
## Model df: 3.   Total lags used: 395
resi_ARIMA_Four_XREG_2<-checkresiduals(ARIMA_Four_forecast_XREG_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,1,1) errors
## Q* = 443.64, df = 392, p-value = 0.03653
## 
## Model df: 3.   Total lags used: 395
resi_NN_forecast_1<-checkresiduals(NN_forecast_1)

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(1,7)
## Q* = 644.4, df = 395, p-value = 2.942e-14
## 
## Model df: 0.   Total lags used: 395
resi_NN_forecast_2<-checkresiduals(NN_forecast_2) 

## 
##  Ljung-Box test
## 
## data:  Residuals from NNAR(1,15)
## Q* = 983.84, df = 395, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 395
#Model1:ETS
scores_ETS<-accuracy(ETS_fit$mean,msts_testing)

#Model2: ARIMA + Fourier, K=c(2,4) 
scores_ARIMA_Four_forecast_1<-accuracy(ARIMA_Four_forecast_1$mean,msts_testing)

#Model3: ARIMA + Fourier, K=c(2,12)
scores_ARIMA_Four_forecast_2<-accuracy(ARIMA_Four_forecast_2$mean,msts_testing)

#Model4: Neural network, K=c(2,4) 
scores_NN_1<-accuracy(NN_forecast_1$mean,msts_testing)

#Model5: Neural network, K=c(2,6)
scores_NN_2<-accuracy(NN_forecast_2$mean,msts_testing)

#Model6: ARIMA + Fourier + XREG, K=c(2,4) 
scores_ARIMA_Four_XREG_1<-accuracy(ARIMA_Four_forecast_XREG_1$mean,msts_testing)

#Model7: ARIMA + Fourier + XREG, K=c(2,12)
scores_ARIMA_Four_XREG_2<-accuracy(ARIMA_Four_forecast_XREG_2$mean,msts_testing)

#create a data frame to store the scores 
scores<-as.data.frame(rbind(scores_ETS,
                            scores_ARIMA_Four_forecast_1,
                            scores_ARIMA_Four_forecast_2,
                            scores_ARIMA_Four_XREG_1,
                            scores_ARIMA_Four_XREG_2,
                            scores_NN_1,
                            scores_NN_2))
row.names(scores)<-c("ETS",
                     "ARIMA+Fourier, K=C(2,4)",
                     "ARIMA+Fourier, K=C(2,12)",
                     "ARIMA+Fourier+XREG, K=C(2,4)",
                     "ARIMA+Fourier+XREG, K=C(2,12)",
                     "NN, K=C(2,4)",
                     "NN, K=C(2,6)")

#create a comparable table 
kbl(scores, 
    caption = "Forecast Accuracy for Electricity Demand",
    digits = array(5,ncol(scores))) %>%
  kable_styling(full_width = FALSE, position = "center", latex_options = "hold_position")
Forecast Accuracy for Electricity Demand
ME RMSE MAE MPE MAPE ACF1 Theil’s U
ETS 84.34916 759.3052 580.6240 -1.42490 14.89561 0.74624 1.43490
ARIMA+Fourier, K=C(2,4) 99.27076 725.1554 540.9276 -1.05062 13.81881 0.77667 1.36068
ARIMA+Fourier, K=C(2,12) 190.83584 735.3196 536.6225 1.59532 13.19891 0.77211 1.32451
ARIMA+Fourier+XREG, K=C(2,4) 2215.52568 2609.1714 2236.9484 60.81112 61.51566 0.92701 6.00400
ARIMA+Fourier+XREG, K=C(2,12) 2195.91287 2601.4314 2220.2837 60.35767 61.17022 0.92918 6.01038
NN, K=C(2,4) 255.27192 723.1842 529.0527 3.70096 12.85880 0.73693 1.29377
NN, K=C(2,6) 239.44901 796.4350 585.9143 3.15991 14.48496 0.74806 1.46118
#Model3 has the lowest the RMSE and MAPE
# Assuming daily mean values are in the correct columns (adjust column names if needed)
temp_july <- mean(df_temp_processed$date >= as.Date("2011-07-01") & df_temp_processed$date <= as.Date("2011-07-31"))
humid_july <- mean(df_humid_processed$date >= as.Date("2011-07-01") & df_humid_processed$date <= as.Date("2011-07-31"))

# Generate Fourier terms for July 2011
fourier_terms_july <- fourier(msts_training, K = c(2, 12), h = 31)

# Combine Fourier terms and exogenous variables into a matrix
xreg_matrix_july <- cbind(fourier_terms_july, temp_july, humid_july)


forecast_july <- forecast(ARIMA_Four_forecast_2,
                          xreg = xreg_matrix_july,
                          h = 31)

# Plot the forecast for July 2011
autoplot(forecast_july) +
  labs(title = "Forecast for Load Data - July 2011")

#Final forecast

#Train over whole dataset for final forecasting
msts_training_all<-msts(load_data,seasonal.periods=c(7,365))

NN_fit_final<-nnetar(msts_training,
                 p=1,
                 P=0,
                 xreg=fourier(msts_training,K=c(2,6)))

NN_forecast_final<-forecast(NN_fit_final,
                        xreg=fourier(msts_training,K=c(2,6),h=31), 
                        h=31)
autoplot(NN_forecast_final)